{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-30T10:28:13.767549Z",
     "start_time": "2019-08-30T10:28:11.696149Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n",
      "/root/.local/lib/python3.5/site-packages/tensorflow/python/framework/dtypes.py:523: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/root/.local/lib/python3.5/site-packages/tensorflow/python/framework/dtypes.py:524: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/root/.local/lib/python3.5/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/root/.local/lib/python3.5/site-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/root/.local/lib/python3.5/site-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/root/.local/lib/python3.5/site-packages/tensorflow/python/framework/dtypes.py:532: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
     ]
    }
   ],
   "source": [
    "#! -*- coding:utf-8 -*-\n",
    "import re, os, json, codecs, gc\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from random import choice\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.model_selection import KFold\n",
    "from keras_bert import load_trained_model_from_checkpoint, Tokenizer\n",
    "\n",
    "from keras.layers import *\n",
    "from keras.callbacks import *\n",
    "from keras.models import Model\n",
    "import keras.backend as K\n",
    "from keras.optimizers import Adam"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-30T10:28:14.074504Z",
     "start_time": "2019-08-30T10:28:13.769918Z"
    }
   },
   "outputs": [],
   "source": [
    "train_lines = codecs.open('../input/Train/Train_DataSet.csv').readlines()[1:]\n",
    "train_df = pd.DataFrame({\n",
    "            'id': [x[:32] for x in train_lines],\n",
    "            'ocr': [x[33:].strip() for x in train_lines]\n",
    "})\n",
    "train_label = pd.read_csv('../input/Train/Train_DataSet_Label.csv')\n",
    "train_df = pd.merge(train_df, train_label, on='id')\n",
    "\n",
    "test_lines = codecs.open('../input/Test_DataSet.csv').readlines()[1:]\n",
    "test_df = pd.DataFrame({\n",
    "            'id': [x[:32] for x in test_lines],\n",
    "            'ocr': [x[33:].strip() for x in test_lines]\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-30T10:28:21.257270Z",
     "start_time": "2019-08-30T10:28:21.150859Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#! -*- coding:utf-8 -*-\n",
    "import re, os, json, codecs, gc\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from random import choice\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.model_selection import KFold\n",
    "from keras_bert import load_trained_model_from_checkpoint, Tokenizer\n",
    "\n",
    "from keras.layers import *\n",
    "from keras.callbacks import *\n",
    "from keras.models import Model\n",
    "import keras.backend as K\n",
    "from keras.optimizers import Adam\n",
    "\n",
    "maxlen = 512\n",
    "config_path = '/export/home/liuyuzhong/kaggle/bert/chinese_wwm_ext_L-12_H-768_A-12/bert_config.json'\n",
    "# checkpoint_path = '/export/home/liuyuzhong/kaggle/bert/chinese_L-12_H-768_A-12/bert_model.ckpt'\n",
    "checkpoint_path = '/export/home/liuyuzhong/kaggle/bert/chinese_wwm_ext_L-12_H-768_A-12/bert_model.ckpt'\n",
    "dict_path = '/export/home/liuyuzhong/kaggle/bert/chinese_wwm_ext_L-12_H-768_A-12/vocab.txt'\n",
    "\n",
    "token_dict = {}\n",
    "with codecs.open(dict_path, 'r', 'utf8') as reader:\n",
    "    for line in reader:\n",
    "        token = line.strip()\n",
    "        token_dict[token] = len(token_dict)\n",
    "\n",
    "class OurTokenizer(Tokenizer):\n",
    "    def _tokenize(self, text):\n",
    "        R = []\n",
    "        for c in text:\n",
    "            if c in self._token_dict:\n",
    "                R.append(c)\n",
    "            elif self._is_space(c):\n",
    "                R.append('[unused1]') # space类用未经训练的[unused1]表示\n",
    "            else:\n",
    "                R.append('[UNK]') # 剩余的字符是[UNK]\n",
    "        return R\n",
    "\n",
    "tokenizer = OurTokenizer(token_dict)\n",
    "\n",
    "def seq_padding(X, padding=0):\n",
    "    L = [len(x) for x in X]\n",
    "    ML = max(L)\n",
    "    return np.array([\n",
    "        np.concatenate([x, [padding] * (ML - len(x))]) if len(x) < ML else x for x in X\n",
    "    ])\n",
    "\n",
    "class data_generator:\n",
    "    def __init__(self, data, batch_size=8, shuffle=True):\n",
    "        self.data = data\n",
    "        self.batch_size = batch_size\n",
    "        self.shuffle = shuffle\n",
    "        self.steps = len(self.data) // self.batch_size\n",
    "        if len(self.data) % self.batch_size != 0:\n",
    "            self.steps += 1\n",
    "    def __len__(self):\n",
    "        return self.steps\n",
    "    def __iter__(self):\n",
    "        while True:\n",
    "            idxs = list(range(len(self.data)))\n",
    "            \n",
    "            if self.shuffle:\n",
    "                np.random.shuffle(idxs)\n",
    "            \n",
    "            X1, X2, Y = [], [], []\n",
    "            for i in idxs:\n",
    "                d = self.data[i]\n",
    "                text = d[0][:maxlen]\n",
    "                x1, x2 = tokenizer.encode(first=text)\n",
    "                y = d[1]\n",
    "                X1.append(x1)\n",
    "                X2.append(x2)\n",
    "                Y.append([y])\n",
    "                if len(X1) == self.batch_size or i == idxs[-1]:\n",
    "                    X1 = seq_padding(X1)\n",
    "                    X2 = seq_padding(X2)\n",
    "                    Y = seq_padding(Y)\n",
    "                    yield [X1, X2], Y[:, 0, :]\n",
    "                    [X1, X2, Y] = [], [], []\n",
    "\n",
    "from keras.metrics import top_k_categorical_accuracy\n",
    "def acc_top2(y_true, y_pred):\n",
    "    return top_k_categorical_accuracy(y_true, y_pred, k=2)\n",
    "                    \n",
    "def build_bert(nclass):\n",
    "    bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None)\n",
    "\n",
    "    for l in bert_model.layers:\n",
    "        l.trainable = True\n",
    "\n",
    "    x1_in = Input(shape=(None,))\n",
    "    x2_in = Input(shape=(None,))\n",
    "\n",
    "    x = bert_model([x1_in, x2_in])\n",
    "    x = Lambda(lambda x: x[:, 0])(x)\n",
    "    p = Dense(nclass, activation='softmax')(x)\n",
    "\n",
    "    model = Model([x1_in, x2_in], p)\n",
    "    model.compile(loss='categorical_crossentropy', \n",
    "                  optimizer=Adam(1e-5),\n",
    "                  metrics=['accuracy', acc_top2])\n",
    "    print(model.summary())\n",
    "    return model\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-30T10:28:22.788539Z",
     "start_time": "2019-08-30T10:28:22.533994Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from keras.utils import to_categorical\n",
    "\n",
    "DATA_LIST = []\n",
    "for data_row in train_df.iloc[:].itertuples():\n",
    "    DATA_LIST.append((data_row.ocr, to_categorical(data_row.label, 3)))\n",
    "DATA_LIST = np.array(DATA_LIST)\n",
    "\n",
    "DATA_LIST_TEST = []\n",
    "for data_row in test_df.iloc[:].itertuples():\n",
    "    DATA_LIST_TEST.append((data_row.ocr, to_categorical(0, 3)))\n",
    "DATA_LIST_TEST = np.array(DATA_LIST_TEST)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-30T10:28:24.868418Z",
     "start_time": "2019-08-30T10:28:24.847638Z"
    }
   },
   "outputs": [],
   "source": [
    "def run_cv(nfold, data, data_label, data_test):\n",
    "    kf = KFold(n_splits=nfold, shuffle=True, random_state=520).split(data)\n",
    "    train_model_pred = np.zeros((len(data), 3))\n",
    "    test_model_pred = np.zeros((len(data_test), 3))\n",
    "\n",
    "    for i, (train_fold, test_fold) in enumerate(kf):\n",
    "        X_train, X_valid, = data[train_fold, :], data[test_fold, :]\n",
    "        \n",
    "        model = build_bert(3)\n",
    "        early_stopping = EarlyStopping(monitor='val_acc', patience=3)\n",
    "        plateau = ReduceLROnPlateau(monitor=\"val_acc\", verbose=1, mode='max', factor=0.5, patience=2)\n",
    "        checkpoint = ModelCheckpoint('./bert_dump/' + str(i) + '.hdf5', monitor='val_acc', \n",
    "                                         verbose=2, save_best_only=True, mode='max',save_weights_only=True)\n",
    "        \n",
    "        train_D = data_generator(X_train, shuffle=True)\n",
    "        valid_D = data_generator(X_valid, shuffle=True)\n",
    "        test_D = data_generator(data_test, shuffle=False)\n",
    "        \n",
    "        model.fit_generator(\n",
    "            train_D.__iter__(),\n",
    "            steps_per_epoch=len(train_D),\n",
    "            epochs=5,\n",
    "            validation_data=valid_D.__iter__(),\n",
    "            validation_steps=len(valid_D),\n",
    "            callbacks=[early_stopping, plateau, checkpoint],\n",
    "        )\n",
    "        \n",
    "        # model.load_weights('./bert_dump/' + str(i) + '.hdf5')\n",
    "        \n",
    "        # return model\n",
    "        train_model_pred[test_fold, :] =  model.predict_generator(valid_D.__iter__(), steps=len(valid_D),verbose=1)\n",
    "        test_model_pred += model.predict_generator(test_D.__iter__(), steps=len(test_D),verbose=1)\n",
    "        \n",
    "        del model; gc.collect()\n",
    "        K.clear_session()\n",
    "        \n",
    "        # break\n",
    "        \n",
    "    return train_model_pred, test_model_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-30T10:29:16.816348Z",
     "start_time": "2019-08-30T10:28:25.966794Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "__________________________________________________________________________________________________\n",
      "Layer (type)                    Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      "input_1 (InputLayer)            (None, None)         0                                            \n",
      "__________________________________________________________________________________________________\n",
      "input_2 (InputLayer)            (None, None)         0                                            \n",
      "__________________________________________________________________________________________________\n",
      "model_2 (Model)                 (None, None, 768)    101677056   input_1[0][0]                    \n",
      "                                                                 input_2[0][0]                    \n",
      "__________________________________________________________________________________________________\n",
      "lambda_1 (Lambda)               (None, 768)          0           model_2[1][0]                    \n",
      "__________________________________________________________________________________________________\n",
      "dense_1 (Dense)                 (None, 3)            2307        lambda_1[0][0]                   \n",
      "==================================================================================================\n",
      "Total params: 101,679,363\n",
      "Trainable params: 101,679,363\n",
      "Non-trainable params: 0\n",
      "__________________________________________________________________________________________________\n",
      "None\n",
      "Epoch 1/5\n"
     ]
    },
    {
     "ename": "InvalidArgumentError",
     "evalue": "Incompatible shapes: [8,514,768] vs. [8,512,768]\n\t [[{{node model_2/Embedding-Position/add}} = Add[T=DT_FLOAT, _class=[\"loc:@training/Adam/gradients/model_2/Embedding-Position/add_grad/Reshape\"], _device=\"/job:localhost/replica:0/task:0/device:GPU:0\"](model_2/Embedding-Token-Segment/add, model_2/Embedding-Position/Tile)]]\n\t [[{{node metrics/acc_top2/Mean_1/_3259}} = _Recv[client_terminated=false, recv_device=\"/job:localhost/replica:0/task:0/device:CPU:0\", send_device=\"/job:localhost/replica:0/task:0/device:GPU:0\", send_device_incarnation=1, tensor_name=\"edge_21926_metrics/acc_top2/Mean_1\", tensor_type=DT_FLOAT, _device=\"/job:localhost/replica:0/task:0/device:CPU:0\"]()]]",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mInvalidArgumentError\u001b[0m                      Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-6-afa2e68f43be>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrain_model_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_model_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun_cv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mDATA_LIST\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mDATA_LIST_TEST\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-5-ffbe8c5c22d5>\u001b[0m in \u001b[0;36mrun_cv\u001b[0;34m(nfold, data, data_label, data_test)\u001b[0m\n\u001b[1;32m     23\u001b[0m             \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalid_D\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__iter__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     24\u001b[0m             \u001b[0mvalidation_steps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalid_D\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m             \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mearly_stopping\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mplateau\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcheckpoint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     26\u001b[0m         )\n\u001b[1;32m     27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.5/site-packages/keras/legacy/interfaces.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     89\u001b[0m                 warnings.warn('Update your `' + object_name + '` call to the ' +\n\u001b[1;32m     90\u001b[0m                               'Keras 2 API: ' + signature, stacklevel=2)\n\u001b[0;32m---> 91\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     92\u001b[0m         \u001b[0mwrapper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_original_function\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     93\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.5/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit_generator\u001b[0;34m(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)\u001b[0m\n\u001b[1;32m   1416\u001b[0m             \u001b[0muse_multiprocessing\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0muse_multiprocessing\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1417\u001b[0m             \u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1418\u001b[0;31m             initial_epoch=initial_epoch)\n\u001b[0m\u001b[1;32m   1419\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1420\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0minterfaces\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlegacy_generator_methods_support\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.5/site-packages/keras/engine/training_generator.py\u001b[0m in \u001b[0;36mfit_generator\u001b[0;34m(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)\u001b[0m\n\u001b[1;32m    215\u001b[0m                 outs = model.train_on_batch(x, y,\n\u001b[1;32m    216\u001b[0m                                             \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 217\u001b[0;31m                                             class_weight=class_weight)\n\u001b[0m\u001b[1;32m    218\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    219\u001b[0m                 \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.5/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mtrain_on_batch\u001b[0;34m(self, x, y, sample_weight, class_weight)\u001b[0m\n\u001b[1;32m   1215\u001b[0m             \u001b[0mins\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0msample_weights\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1216\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_train_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1217\u001b[0;31m         \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1218\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0munpack_singleton\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1219\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m   2713\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_legacy_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2714\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2715\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2716\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2717\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mpy_any\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mis_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m   2673\u001b[0m             \u001b[0mfetched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_callable_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0marray_vals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_metadata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2674\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2675\u001b[0;31m             \u001b[0mfetched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_callable_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0marray_vals\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2676\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mfetched\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2677\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.5/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1437\u001b[0m           ret = tf_session.TF_SessionRunCallable(\n\u001b[1;32m   1438\u001b[0m               \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1439\u001b[0;31m               run_metadata_ptr)\n\u001b[0m\u001b[1;32m   1440\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1441\u001b[0m           \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py\u001b[0m in \u001b[0;36m__exit__\u001b[0;34m(self, type_arg, value_arg, traceback_arg)\u001b[0m\n\u001b[1;32m    526\u001b[0m             \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    527\u001b[0m             \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc_api\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_Message\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 528\u001b[0;31m             c_api.TF_GetCode(self.status.status))\n\u001b[0m\u001b[1;32m    529\u001b[0m     \u001b[0;31m# Delete the underlying status object from memory otherwise it stays alive\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    530\u001b[0m     \u001b[0;31m# as there is a reference to status from this from the traceback due to\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mInvalidArgumentError\u001b[0m: Incompatible shapes: [8,514,768] vs. [8,512,768]\n\t [[{{node model_2/Embedding-Position/add}} = Add[T=DT_FLOAT, _class=[\"loc:@training/Adam/gradients/model_2/Embedding-Position/add_grad/Reshape\"], _device=\"/job:localhost/replica:0/task:0/device:GPU:0\"](model_2/Embedding-Token-Segment/add, model_2/Embedding-Position/Tile)]]\n\t [[{{node metrics/acc_top2/Mean_1/_3259}} = _Recv[client_terminated=false, recv_device=\"/job:localhost/replica:0/task:0/device:CPU:0\", send_device=\"/job:localhost/replica:0/task:0/device:GPU:0\", send_device_incarnation=1, tensor_name=\"edge_21926_metrics/acc_top2/Mean_1\", tensor_type=DT_FLOAT, _device=\"/job:localhost/replica:0/task:0/device:CPU:0\"]()]]"
     ]
    }
   ],
   "source": [
    "train_model_pred, test_model_pred = run_cv(10, DATA_LIST, None, DATA_LIST_TEST)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-30T10:18:41.879503Z",
     "start_time": "2019-08-30T10:18:41.776790Z"
    }
   },
   "outputs": [],
   "source": [
    "test_pred = [np.argmax(x) for x in test_model_pred]\n",
    "test_df['label'] = test_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-30T10:18:42.211656Z",
     "start_time": "2019-08-30T10:18:42.185316Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "test_df[['id', 'label']].to_csv('baseline3.csv', index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-30T10:20:57.406209Z",
     "start_time": "2019-08-30T10:20:57.399812Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# np.save('bert_test_maxlen400_bs16_train.npy', train_model_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
